* This .do file produces the graphs for the blog "What the FinCEN leaks reveal about the ongoing war on dirty money " 

* Installs needed to run graphs 
net install scheme-modern, from("https://raw.githubusercontent.com/mdroste/stata-scheme-modern/master/")
capture ssc install kountry
capture ssc install mmerge 
capture ssc install shp2dta
capture ssc install mif2dta
capture ssc install wbopendata

cd "ENTER YOUR DIRECTORY HERE"
D:\Dropbox\Research\Blogs\files for publication
***************************************
* Figure 1 - fines and SARs over time *
***************************************
* Loading fincen staff numbers
import excel "D:\Dropbox\Research\Blogs\input\fincen.xlsx", sheet("Sheet1") firstrow clear
reg      actual requested														// For the last two years we are going to use
predict  ahat																	// the predicted values from a regression of actual
replace  actual = round(ahat,1) if actua == . 									// numbers on what was requested in the president's budget
tempfile staff
save    `staff', replace

* Loading more recent SARs data (2014-2019)
import excel "D:\Dropbox\Research\Blogs\input\Section_2_-_Depository_Institution_SARs.xls", sheet("Exhibit 1") cellrange(A12:I35) firstrow clear
local k = 2014
foreach var of varlist B-G {
    rename `var' depository_sars`k'
	local k = `k' + 1
}
drop     H I
keep     in 13
gen      id = 1
reshape  long depository_sars, i(id) j(year)
keep     year depository_sars
tempfile s1
save    `s1', replace

* Loading 2013 from a different file
import excel "D:\Dropbox\Research\Blogs\input\Section_2-Depository_Institution_SARs3.xls", sheet("Exhibit 1") cellrange(A14:J57) firstrow clear
keep in 13
keep     C
gen      year = 2013
rename   C depository_sars
tempfile s2
save    `s2', replace

* Earlier data and then merging in the rest
import excel "D:\Dropbox\Research\Blogs\input\depository_sars_0313.xlsx", sheet("Sheet1") firstrow clear
append using      `s2'
append using      `s1'
mmerge year using `staff', type(1:1)											// Merging in staff numbers


* Generating SARs per worker
gen spw = (depository_sars) / actual
sort year

* Drawing Figure 1
twoway (line spw year if year >=2002) 														///
       	   , ytitle(SARs per FinCEN employee (banks)) 										///
		 xtitle("") 																	///
		 legend(off)	///
					  xsize(8) 															///
					  scale(1.3)														///
					  graphregion(fcolor("250 250 250") lcolor("250 250 250"))			///
					  note("Sources: FinCEN SARs Statistics and Treasury budget summaries." ///
						   "(https://www.fincen.gov/reports/sar-stats)"					///
						   "(https://home.treasury.gov/about/budget-financial-reporting-planning-and-performance/budget-requestannual-performance-plan-and-reports/budget-in-brief)"					///
					       , size(vsmall))
gr_edit .xaxis1.plotregion.xscale.curmin = 2002
gr_edit .xaxis1.reset_rule 2002 2020 2 , tickset(major) ruletype(range) 
gr export "fincen_fig1c.png", as(png) width(3000) replace


		
*********************************************
* Figure 2 - correspondent banking coverage *
*********************************************

* CPI (2019)
import excel "CPI2019.xlsx", sheet("CPI2019") cellrange(A3:V183) clear firstrow
keep     ISO3 CPI
rename   CPI cpi_2019
rename   ISO3 iso3
drop if  iso3 == ""
tempfile cpi
save    `cpi', replace

* FATF blacklisting status 2010-2019
use "FATF_panel_all_year.dta", clear
keep if  year >=2010 
collapse (max) f_black, by(iso31661alpha3)
keep     iso3 f_black
tempfile f
save    `f', replace


* IMF Banking data
use "Download data in STATA.dta", clear

* Keeping year closest to 2015
egen   c = group(iso3)
tsset  c year
bysort c (year): carryforward s_institutions_A1, gen(num_banks)
keep if year == 2015
replace iso3 = "RKS" if economy == "Kosovo, Rep. of"
keep    iso3 num_banks
drop if iso3 ==""
tempfile b
save    `b', replace

* Income
wbopendata, indicator(NY.GNP.PCAP.CD;SP.POP.TOTL) clear long 					// Loading data from World Bank on GDP per capita, population
keep if year   == 2018
gen lnny = ln(ny)
keep countrycode lnny
tempfile i
save `i', replace

* Starting with ICIJ banking connection dataset
import delimited "download_bank_connections.csv", varnames(1) clear 

preserve
	keep entity_b_id entity_b_iso_code
	gen  num_reported_banks = 1
	collapse (sum) num_reported_banks, by(entity_b_iso_code)
	tempfile r
	save    `r', replace
restore

* Generating number of sars reports by country 
gen num_sars = 1
collapse (sum) num_sars, by(filer_org_name_id entity_b_iso_code)
tempfile ns
save    `ns', replace

* Using 2015 chips data on correspondent connections
use "num_correspondents_chips.dta", clear
mmerge iso3c using `b', umatch(iso3) type(n:1) unmatched(master)

* Bringing in banking data
gen per_correspondents = min(num_correspondents/num_banks,1)					


* Bringing in reporting data
mmerge fid iso3c using `ns', umatch(filer_org_name_id entity_b_iso_code) type(n:1) 
replace num_sars = 0 if num_sars == . 

* Collapsing by country
collapse (sum) num_correspondents num_sars (mean) num_banks, by(iso3c)

mmerge iso3c using `r', type(1:1) umatch(entity_b_iso_code)

replace num_reported_banks = 0 if num_reported_banks == . 
gen     per_correspondents = min(num_correspondents/num_banks,1)				// Not allowing the proportion of covered banks to exceed 1
gen     per_reported       = min(num_reported_banks/num_banks,1)				// ditto

save  "pc.dta", replace

* Bringing in income, cpi and fatf blacklisting
mmerge iso3c using `i', umatch(countrycode) unmatched(master)
mmerge iso3c using `cpi', umatch(iso3) unmatched(master)
mmerge iso3c using `f', umatch(iso3) unmatched(master)

gen lnbanks = ln(num_banks)
lab var per_correspondents "Proportion of banks with a correspondent connection to a US bank"

* Running regression and plotting
reg per_reported per_correspondents lnny lnbanks f_black cpi_2019, robust
est sto k 
coefplot k, drop(_cons) xline(0) graphregion(fcolor("250 250 250") lcolor("250 250 250"))	

graph play "t5.grec"
gr export "fincen_fig2b.png", as(png) width(3000) replace


******************************
* Fig 3 - map of connections *
******************************
capture erase worlddata.dta
capture erase worldcoor.dta
shp2dta using ne_10m_admin_0_countries, data(worlddata) coor(worldcoor) genid(id)
use worlddata.dta, clear

rename ADM0_A3 countrycode
mmerge countrycode using pc, umatch(iso3) type(n:1)
drop if _merge == 2

replace per_corr = . if countrycode == "USA"

spmap per_corr using worldcoor.dta if ADMIN!="Antarctica" & SUBREGION != "Oceania" & SUBREGION != "Polynesia", id(id)  ///
	osize(vvvthin) ndsize(vvvthin) clmethod(custom) fcolor(Blues) clbreaks(0 .25 .50 .75 1)

graph play "t3.grec"
gr export "fincen_fig3.png", as(png) width(3000) replace


exit